This Rmarkdonwn document can be used to reproduce the panels in figure 4 of the manuscript: huva: human variation in gene expression as surrogate for gene function. To run this script without any problem of dependencies or conflicts in the installation we suggest to use the docker image we provide (see README file).
In this figure we expand the huva approach to the entire trascriptome. We first run an huva experiment for all present genes in the 500FG dataset and later explore it, fist manually and later with the use of a co-expression network analysis (CoCena).
library(ggplot2)
library(clusterProfiler) # version 3.12.0
library(combinat)
library(ComplexHeatmap)
library(dplyr)
library(ggplot2)
library(graphics)
library(grDevices)
library(grid)
library(igraph)
library(knitr)
library(pcaGoPromoter.Hs.hg19)
library(pheatmap)
library(purrr)
library(RColorBrewer)
library(stringi)
library(tidyr)
library(tidyverse)
library(utils)
library(ggnetwork)
library(intergraph)
library(MCDA)
library(reactome.db)
library(ReactomePA)
library(biomaRt)
library(org.Hs.eg.db) # version 3.8.2
library(Hmisc)
library(gtools)
source("./source/costum_functions.R")
The calculation of the huva experiment was performed with a legacy version of huva (v 0.1.1), the function used can be found loaded in the envoiroment (calculate_network). Here we will simply load the calculated dataset to keep calculation time short. The result of the caclulation with the current version of huva is identical.
load("./data/network_data.RData")
We where fist interested in the analysis of genes strongly influencing the number of circulating monocytes. We visualized them as volcano plot according to the result of the huva experiment
vulcano_cell_type(x = "Monocytes (CD14+)", y = "pval Monocytes (CD14+)") +
ylab("-log10(p value)") +
theme(legend.position = "none") +
ggtitle("Fig. 4b - Volcano Monocytes")
part1 <- list()
part1$CD14_spec <- intersect(row.names(FG500_bin$cell_fc[FG500_bin$cell_fc$`Monocytes (CD14+)`<1,]),
row.names(FG500_bin$cell_pval[FG500_bin$cell_pval$`Monocytes (CD14+)`<0.05,]))
top_genes_plot(data =log2(FG500_bin$cell_fc)*-log10(FG500_bin$cell_pval),
list = part1$CD14_spec,
paramiter = "Monocytes (CD14+)",
fill = "Monocytes (CD14+)",
title = "Fig 4c - Top 20 - Monocytes",
top_n = 20, decreasing = F, log_trans=F) +
theme(legend.position = "none") +
ylab("log2(FC)*-log10(p val)") +
xlab("")
vulcano_cytokine_type(x = "IFNy_PHA_WB_48h", y = "pval IFNy_PHA_WB_48h") +
ylab("-log10(p value)") +
theme(legend.position = "none") +
ggtitle("Fig. S10b - Volcano IFNg")
part1$IFN_spec <- intersect(row.names(FG500_bin$cyto_fc[FG500_bin$cyto_fc$IFNy_PHA_WB_48h<1,]),
row.names(FG500_bin$cyto_pval[FG500_bin$cyto_pval$IFNy_PHA_WB_48h<0.05,]))
top_genes_plot(data =log2(FG500_bin$cyto_fc)*-log10(FG500_bin$cyto_pval),
list = part1$IFN_spec,
paramiter = "IFNy_PHA_WB_48h",
fill = "IFNy_PHA_WB_48h",
title = "Fig. S10c - Top20 - IFNg",
top_n = 20, decreasing = F, log_trans=F) +
theme(legend.position = "none") +
ylab("log2(FC)*-log10(p val)") +
xlab("")
We will now explore the result of the transcriptome-wide huva expreiment we used the Co-expression network analysis tool CoCena2. We generate first a network of huva experiments for the cell counts to identify main regulators for each cell type.
We here load the previously calculated huva experiment results. As robust metric for the calculation of the network we used the product of the log2 fold change and the negative log 10 p value fo the comparison.We also inverted the sign of the result, as described in the manuscript, to ease the interpretation of the results (a negative values represent a negative impact on a specific cell type/cytokine).
load("./data/network_data.RData")
df <- -(log2(FG500_bin$cell_fc)*-log10(FG500_bin$cell_pval))
count_file_name <- df
count_file_name <- count_file_name[, c(1,14,27,33,53,59)]
info_dataset_name <- data.frame(gene=colnames(count_file_name))
info_dataset_name$data_type <- "cell_count"
rownames(info_dataset_name) <- info_dataset_name$gene
info_dataset_name$group <- info_dataset_name$gene
rm(df)
working_directory = "./"
topvar_genes = 3000
voi = "group"
TF_list_name = "TFcat.txt"
gmtfile_name_hallmarks = "h.all.v6.1.symbols.gmt"
gmtfile_name_go = "c5.bp.v7.0.symbols.gmt"
gmtfile_name_kegg = "c2.cp.kegg.v7.0.symbols.gmt"
gmtfile_name_reactome = "c2.cp.reactome.v7.0.symbols.gmt"
organism = "human"
min_corr=0.5
range_cutoff_length=1000
print_distribution_plots = FALSE
min_nodes_number_for_network=20
min_nodes_number_for_cluster=20
data_in_log=T
range_GFC=2.0
layout_algorithm = "layout_with_fr"
mart <- biomaRt::useMart("ensembl")
mart <- biomaRt::listDatasets(mart)
human <- biomaRt::useMart("ensembl", dataset = "hsapiens_gene_ensembl")
mouse <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
count_table <- count_file_name
universe_Entrez <- clusterProfiler:: bitr(row.names(count_table),
fromType="SYMBOL",
toType="ENTREZID",
OrgDb="org.Hs.eg.db",
drop = T)
info_dataset <- info_dataset_name
TF_list <- read.delim(paste0(working_directory, "reference_files/", TF_list_name),
header=TRUE,
check.names=F)
gmtfile_hallmarks <- clusterProfiler::read.gmt(paste0(working_directory, "reference_files/", gmtfile_name_hallmarks))
gmtfile_go <- clusterProfiler::read.gmt(paste0(working_directory, "reference_files/", gmtfile_name_go))
gmtfile_kegg <- clusterProfiler::read.gmt(paste0(working_directory, "reference_files/", gmtfile_name_kegg))
gmtfile_reactome <- clusterProfiler::read.gmt(paste0(working_directory, "reference_files/", gmtfile_name_reactome))
We filter tha data here for the top 3000 most variable huva experiments
ds = count_table[order(apply(count_table,1,var), decreasing=T),]
dd2 <- head(ds,topvar_genes)
dd2 = t(dd2)
source(paste0(working_directory,"source/", "correlation_actions.R"))
source(paste0(working_directory,"source/", "obtain_cutoff_stats.R"))
cutoff_stats = do.call("rbind", lapply(X = range_cutoff,
FUN = cutoff_prep,
corrdf_r = correlation_df_filt,
print.all.plots = print_distribution_plots))
source(paste0(working_directory,"source/", "optimal_cutoff.R"))
## [1] "The calculated optimal cutoff is: 0.971"
optimal_cutoff = calculated_optimal_cutoff
cutoff_wd <- paste0("Cell_count_",optimal_cutoff, "_", topvar_genes)
if(!cutoff_wd %in% list.dirs(working_directory)) {
dir.create(paste0(working_directory,cutoff_wd))}
## Warning in dir.create(paste0(working_directory, cutoff_wd)): './
## Cell_count_0.971_3000' already exists
stats_optimal_cutoff <- cutoff_stats[cutoff_stats$cutoff == optimal_cutoff, c("degree", "Probs")]
filt_cutoff_data = correlation_df_filt %>% dplyr::filter(rval > optimal_cutoff)
filt_cutoff_graph = igraph::graph_from_data_frame(filt_cutoff_data,directed=FALSE)
filt_cutoff_counts = ds[row.names(ds) %in% names(V(filt_cutoff_graph)),]
corresp_info = info_dataset[rownames(dd2)%in%rownames(info_dataset),]
print(paste("After using the optimal cutoff of",optimal_cutoff, "the number of edges =",
nrow(filt_cutoff_data), "and the number of nodes =", nrow(filt_cutoff_counts)))
## [1] "After using the optimal cutoff of 0.971 the number of edges = 61585 and the number of nodes = 2894"
source(paste0(working_directory,"source/", "GFC_calculation.R" ))
GFC <- GFC_calculation(voi_id = voi)
GFC_all_samples <- GFC_calculation(voi_id = "gene")
source(paste0(working_directory,"source/", "cluster_calculation.R" ))
cluster_information <- cluster_calculation(igraph = filt_cutoff_graph,
cluster_algo = "auto",
no_of_iterations = 10,
max_cluster_count_per_gene = 10,
min_cluster_size = min_nodes_number_for_cluster,
GFC = GFC)
cluster_information_all <- cluster_calculation(igraph = filt_cutoff_graph,
cluster_algo = "auto",
no_of_iterations = 10,
max_cluster_count_per_gene = 10,
min_cluster_size = min_nodes_number_for_cluster,
GFC = GFC_all_samples)
source(paste0(working_directory,"source/", "heatmap_clusters.R" ))
heatmap_cluster <- heatmap_clusters(data = cluster_information, cluster_cols = T)
source(paste0(working_directory,"source/", "network_generation.R" ))
return_network <- network_layout(igraph.object = filt_cutoff_graph,
use.layout = layout_algorithm,
min.nodes.number = min_nodes_number_for_network)
igraph_object <- return_network$graph_object
layout_matrix <- return_network$layout
node_attributes <- node_information(igraph.object = igraph_object,
data_df = cluster_information,
GFC_df = GFC,
TF_df = TF_list,
hallmark_df = gmtfile_hallmarks,
go_df = gmtfile_go,
kegg_df = gmtfile_kegg,
reactome_df = gmtfile_reactome,
org = organism)
network_object <- generate_network_object(graph_object = igraph_object,
use_layout = layout_matrix)
source(paste0(working_directory,"source/", "network_visualization.R" ))
network_cluster <- visualize_network(network = network_object,
color.by = "cluster",
select.cluster = NULL,
plot.subnetwork = NULL,
gene.label = NULL,
use.layout = layout_algorithm,
save.pdf=F)
print(network_cluster)
source(paste0(working_directory,"source/", "network_visualization.R" ))
network_GFC <- GFC_colored_network(network = network_object,
select.cluster = NULL,
plot.subnetwork = NULL,
gene.label = NULL,
use.layout = layout_algorithm,
save.pdf = F,
save.single.pdf = F)
gridExtra::marrangeGrob(grobs = network_GFC, ncol = 1, nrow = 1)
source(paste0(working_directory,"source/", "network_visualization.R" ))
STAT1 <- visualize_network(network = network_object,
color.by = "cluster",
select.cluster = c("khaki"),
gene.label = c("STAT1"),
plot.subnetwork = NULL,
use.layout = layout_pers,
save.pdf = F)
print(STAT1)
source(paste0(working_directory,"source/","clusterprofiler_autoCena.R"))
clust_prof= clusterprofiler_autoCena(cluster_data = cluster_information,
cutoff_wd = cutoff_wd,
originalwd = working_directory,
chosen_cutoff = optimal_cutoff,
group = voi)
names(clust_prof) <- cluster_information[cluster_information$cluster_included=="yes",]$color
source("./source/plot_selected_GOEA.R")
load("./data/network_data.RData")
df <- -(log2(FG500_bin$cyto_fc)*-log10(FG500_bin$cyto_pval))
count_file_name <- df
info_dataset_name <- data.frame(gene=colnames(count_file_name))
info_dataset_name$data_type <- "cytokines"
rownames(info_dataset_name) <- info_dataset_name$gene
info_dataset_name$group <- unlist(lapply(strsplit(rownames(info_dataset_name), "_"), `[[`, 1))
info_dataset_name$stim <- unlist(lapply(strsplit(rownames(info_dataset_name), "_"), `[[`, 2))
info_dataset_name$time <- unlist(lapply(strsplit(rownames(info_dataset_name), "_"), `[[`, 4))
rm(df)
working_directory = "./"
topvar_genes = 3000
voi = "group"
TF_list_name = "TFcat.txt"
gmtfile_name_hallmarks = "h.all.v6.1.symbols.gmt"
gmtfile_name_go = "c5.bp.v7.0.symbols.gmt"
gmtfile_name_kegg = "c2.cp.kegg.v7.0.symbols.gmt"
gmtfile_name_reactome = "c2.cp.reactome.v7.0.symbols.gmt"
organism = "human"
min_corr=0.5
range_cutoff_length=1000
print_distribution_plots = FALSE
min_nodes_number_for_network=20
min_nodes_number_for_cluster=20
data_in_log=T
range_GFC=2.0
layout_algorithm = "layout_with_fr"
mart <- biomaRt::useMart("ensembl")
mart <- biomaRt::listDatasets(mart)
human <- biomaRt::useMart("ensembl", dataset = "hsapiens_gene_ensembl")
mouse <- biomaRt::useMart("ensembl", dataset = "mmusculus_gene_ensembl")
count_table <- count_file_name
universe_Entrez <- clusterProfiler:: bitr(row.names(count_table),
fromType="SYMBOL",
toType="ENTREZID",
OrgDb="org.Hs.eg.db",
drop = T)
info_dataset <- info_dataset_name
TF_list <- read.delim(paste0(working_directory, "reference_files/", TF_list_name),
header=TRUE,
check.names=F)
gmtfile_hallmarks <- clusterProfiler::read.gmt(paste0(working_directory, "reference_files/", gmtfile_name_hallmarks))
gmtfile_go <- clusterProfiler::read.gmt(paste0(working_directory, "reference_files/", gmtfile_name_go))
gmtfile_kegg <- clusterProfiler::read.gmt(paste0(working_directory, "reference_files/", gmtfile_name_kegg))
gmtfile_reactome <- clusterProfiler::read.gmt(paste0(working_directory, "reference_files/", gmtfile_name_reactome))
Also here we take the top 3000 huva experiments
ds = count_table[order(apply(count_table,1,var), decreasing=T),]
dd2 <- head(ds,topvar_genes)
dd2 = t(dd2)
source(paste0(working_directory,"source/", "correlation_actions.R"))
source(paste0(working_directory,"source/", "obtain_cutoff_stats.R"))
cutoff_stats = do.call("rbind", lapply(X = range_cutoff,
FUN = cutoff_prep,
corrdf_r = correlation_df_filt,
print.all.plots = print_distribution_plots))
source(paste0(working_directory,"source/", "optimal_cutoff.R"))
## [1] "The calculated optimal cutoff is: 0.674"
optimal_cutoff = calculated_optimal_cutoff
cutoff_wd <- paste0("Cytokines_",optimal_cutoff, "_", topvar_genes)
if(!cutoff_wd %in% list.dirs(working_directory)) {
dir.create(paste0(working_directory,cutoff_wd))}
## Warning in dir.create(paste0(working_directory, cutoff_wd)): './
## Cytokines_0.674_3000' already exists
stats_optimal_cutoff <- cutoff_stats[cutoff_stats$cutoff == optimal_cutoff, c("degree", "Probs")]
filt_cutoff_data = correlation_df_filt %>% dplyr::filter(rval > optimal_cutoff)
filt_cutoff_graph = igraph::graph_from_data_frame(filt_cutoff_data,directed=FALSE)
filt_cutoff_counts = ds[row.names(ds) %in% names(V(filt_cutoff_graph)),]
corresp_info = info_dataset[rownames(dd2)%in%rownames(info_dataset),]
print(paste("After using the optimal cutoff of",optimal_cutoff, "the number of edges =",
nrow(filt_cutoff_data), "and the number of nodes =", nrow(filt_cutoff_counts)))
## [1] "After using the optimal cutoff of 0.674 the number of edges = 159827 and the number of nodes = 2943"
source(paste0(working_directory,"source/", "GFC_calculation.R" ))
GFC <- GFC_calculation(voi_id = voi)
GFC_all_samples <- GFC_calculation(voi_id = "stim")
source(paste0(working_directory,"source/", "cluster_calculation.R" ))
cluster_information <- cluster_calculation(igraph = filt_cutoff_graph,
cluster_algo = "auto",
no_of_iterations = 10,
max_cluster_count_per_gene = 10,
min_cluster_size = min_nodes_number_for_cluster,
GFC = GFC)
source(paste0(working_directory,"source/", "heatmap_clusters.R" ))
heatmap_cluster <- heatmap_clusters(data = cluster_information, cluster_cols = T)
cluster_information_all <- cluster_calculation(igraph = filt_cutoff_graph,
cluster_algo = "auto",
no_of_iterations = 10,
max_cluster_count_per_gene = 10,
min_cluster_size = min_nodes_number_for_cluster,
GFC = GFC_all_samples)
source(paste0(working_directory,"source/", "heatmap_clusters.R" ))
heatmap_cluster_all_samples <- heatmap_clusters(data = cluster_information_all, cluster_cols = T)
source(paste0(working_directory,"source/", "network_generation.R" ))
return_network <- network_layout(igraph.object = filt_cutoff_graph,
use.layout = layout_algorithm,
min.nodes.number = min_nodes_number_for_network)
igraph_object <- return_network$graph_object
layout_matrix <- return_network$layout
node_attributes <- node_information(igraph.object = igraph_object,
data_df = cluster_information,
GFC_df = GFC,
TF_df = TF_list,
hallmark_df = gmtfile_hallmarks,
go_df = gmtfile_go,
kegg_df = gmtfile_kegg,
reactome_df = gmtfile_reactome,
org = organism)
network_object <- generate_network_object(graph_object = igraph_object,
use_layout = layout_matrix)
node_attributes <- node_information(igraph.object = igraph_object,
data_df = cluster_information,
GFC_df = GFC_all_samples,
TF_df = TF_list,
hallmark_df = gmtfile_hallmarks,
go_df = gmtfile_go,
kegg_df = gmtfile_kegg,
reactome_df = gmtfile_reactome,
org = organism)
network_object_all_samples <- generate_network_object(graph_object = igraph_object,
use_layout = layout_matrix)
source(paste0(working_directory,"source/", "network_visualization.R" ))
network_cluster <- visualize_network(network = network_object,
color.by = "cluster",
select.cluster = NULL,
plot.subnetwork = NULL,
gene.label = NULL,
use.layout = layout_algorithm,
save.pdf=F)
print(network_cluster)
source(paste0(working_directory,"source/", "network_visualization.R" ))
network_GFC <- GFC_colored_network(network = network_object,
select.cluster = NULL,
plot.subnetwork = NULL,
gene.label = NULL,
use.layout = layout_algorithm,
save.pdf = F,
save.single.pdf = F)
gridExtra::marrangeGrob(grobs = network_GFC, ncol = 1, nrow = 1)
source(paste0(working_directory,"source/","clusterprofiler_autoCena.R" ))
clust_prof= clusterprofiler_autoCena(cluster_data = cluster_information,
cutoff_wd = cutoff_wd,
originalwd = working_directory,
chosen_cutoff = optimal_cutoff,
group = voi)
names(clust_prof) <- cluster_information[cluster_information$cluster_included=="yes",]$color
source("./source/plot_selected_GOEA_cyto.R")
source("./source/clean.R")
info <- sessionInfo()
info
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 grid stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] GSEABase_1.52.1 graph_1.68.0
## [3] annotate_1.68.0 XML_3.99-0.5
## [5] gtools_3.8.2 Hmisc_4.4-2
## [7] Formula_1.2-4 survival_3.2-7
## [9] lattice_0.20-41 org.Hs.eg.db_3.8.2
## [11] biomaRt_2.46.3 ReactomePA_1.34.0
## [13] reactome.db_1.74.0 AnnotationDbi_1.52.0
## [15] IRanges_2.24.1 S4Vectors_0.28.1
## [17] Biobase_2.50.0 BiocGenerics_0.36.0
## [19] MCDA_0.0.20 intergraph_2.0-2
## [21] ggnetwork_0.5.8 forcats_0.5.1
## [23] stringr_1.4.0 readr_1.4.0
## [25] tibble_3.0.6 tidyverse_1.3.0
## [27] tidyr_1.1.2 stringi_1.5.3
## [29] RColorBrewer_1.1-2 purrr_0.3.4
## [31] pheatmap_1.0.12 pcaGoPromoter.Hs.hg19_1.26.0
## [33] knitr_1.31 igraph_1.2.6
## [35] dplyr_1.0.4 ComplexHeatmap_2.6.2
## [37] combinat_0.0-8 clusterProfiler_3.12.0
## [39] ggplot2_3.3.3
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 shadowtext_0.0.7 backports_1.2.1
## [4] circlize_0.4.12 fastmatch_1.1-0 BiocFileCache_1.14.0
## [7] plyr_1.8.6 splines_4.0.3 BiocParallel_1.24.1
## [10] digest_0.6.27 htmltools_0.5.1.1 GOSemSim_2.16.1
## [13] viridis_0.5.1 GO.db_3.12.1 magrittr_2.0.1
## [16] checkmate_2.0.0 memoise_2.0.0 cluster_2.1.1
## [19] openxlsx_4.2.3 graphlayouts_0.7.1 modelr_0.1.8
## [22] matrixStats_0.58.0 askpass_1.1 enrichplot_1.10.2
## [25] prettyunits_1.1.1 jpeg_0.1-8.1 colorspace_2.0-0
## [28] blob_1.2.1 rvest_0.3.6 rappdirs_0.3.3
## [31] ggrepel_0.9.1 haven_2.3.1 xfun_0.21
## [34] crayon_1.4.1 jsonlite_1.7.2 scatterpie_0.1.5
## [37] Rglpk_0.6-4 glue_1.4.2 polyclip_1.10-0
## [40] gtable_0.3.0 GetoptLong_1.0.5 graphite_1.36.0
## [43] shape_1.4.5 scales_1.1.1 DOSE_3.16.0
## [46] DBI_1.1.1 Rcpp_1.0.6 xtable_1.8-4
## [49] htmlTable_2.1.0 viridisLite_0.3.0 progress_1.2.2
## [52] clue_0.3-58 foreign_0.8-81 bit_4.0.4
## [55] htmlwidgets_1.5.3 httr_1.4.2 fgsea_1.12.0
## [58] ellipsis_0.3.1 pkgconfig_2.0.3 farver_2.0.3
## [61] nnet_7.3-15 dbplyr_2.1.0 labeling_0.4.2
## [64] tidyselect_1.1.0 rlang_0.4.10 reshape2_1.4.4
## [67] munsell_0.5.0 cellranger_1.1.0 tools_4.0.3
## [70] cachem_1.0.4 cli_2.3.0 generics_0.1.0
## [73] RSQLite_2.2.3 broom_0.7.4 evaluate_0.14
## [76] fastmap_1.1.0 yaml_2.2.1 bit64_4.0.5
## [79] fs_1.5.0 tidygraph_1.2.0 zip_2.1.1
## [82] ggraph_2.0.4 slam_0.1-48 DO.db_2.9
## [85] xml2_1.3.2 compiler_4.0.3 rstudioapi_0.13
## [88] curl_4.3 png_0.1-7 reprex_1.0.0
## [91] tweenr_1.0.1 highr_0.8 Matrix_1.3-2
## [94] vctrs_0.3.6 pillar_1.4.7 lifecycle_1.0.0
## [97] BiocManager_1.30.12 GlobalOptions_0.1.2 data.table_1.13.6
## [100] cowplot_1.1.1 qvalue_2.22.0 latticeExtra_0.6-29
## [103] R6_2.5.0 network_1.16.1 gridExtra_2.3
## [106] MASS_7.3-53.1 assertthat_0.2.1 openssl_1.4.3
## [109] rjson_0.2.20 withr_2.4.1 hms_1.0.0
## [112] rpart_4.1-15 glpkAPI_1.3.2 rmarkdown_2.6
## [115] rvcheck_0.1.8 Cairo_1.5-12.2 ggforce_0.3.2
## [118] base64enc_0.1-3 lubridate_1.7.9.2
save.image(paste("./data/",Sys.Date(), "_huva_Figure_4.RData", sep = ""))